import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
from ml_core.utils import Normalization, DataProcess
from ml_core.regression import GradientDescent, OrdinaryLeastSquares, StochasticGradientDescent
from sklearn.model_selection import train_test_split
from ml_core.metrics import ErrorMetrics
df = pd.read_csv('data/artificial1d.csv', header=None, names=['x', 'y'])
df.dtypes
x float64 y float64 dtype: object
normalized_x, _ = Normalization.z_score_normalization(df['x'].to_numpy())
normalized_y, denormalized_y = Normalization.z_score_normalization(df['y'].to_numpy())
X_ones = DataProcess.add_ones_column(normalized_x)
y = DataProcess.reshape_vector(normalized_y)
def show_rmse_curve(history, title='RMSE Curve'):
rmse_values = [step['rmse_error'] for step in history]
steps = [i for i, _ in enumerate(history)]
fig = px.line(x=steps, y=rmse_values, labels={'x':'steps', 'y':'RMSE'}, title=title)
return fig
def show_final_result(history,
denormalized_function=denormalized_y,
title='Model Result',
x=df['x'],
y=df['y'],
test_matrix=X_ones):
if isinstance(history, list):
final_model = history[-1]['model']
else:
final_model = history
preds = denormalized_y(final_model.predict(test_matrix))
fig = px.scatter(x=x, y=y, title=title)
fig.add_trace(
go.Scatter(x=x, y=preds[:, 0])
)
return fig
gd = GradientDescent(ephocs=1000, with_history_predictions=True, l2_regulazation=0.001)
history_gd = gd.fit(X_ones, y)
0%| | 0/1000 [00:00<?, ?it/s]
show_rmse_curve(history_gd)
show_final_result(history_gd)
ols = OrdinaryLeastSquares(ephocs=1000, with_history_predictions=True)
model_ols = ols.fit(X_ones, y)
show_final_result(model_ols)
gde = StochasticGradientDescent(ephocs=100, with_history_predictions=True, l2_regulazation=0.001)
history_gde = gde.fit(X_ones, y)
0%| | 0/100 [00:00<?, ?it/s]
show_rmse_curve(history_gde)
show_final_result(history_gde)
X_pow = DataProcess.generate_polynomial_order(X_ones, 11)
model_p_ols = ols.fit(X_pow, y)
show_final_result(model_p_ols, test_matrix=X_pow)
df_p = pd.read_csv('data/california.csv', header=None)
arr = df_p.to_numpy()
X_p = arr[:, :8]
y_p = DataProcess.reshape_vector(arr[:, 8])
X_norm = np.empty_like(X_p)
for idx, col in enumerate(X_p.T):
norm, _= Normalization.z_score_normalization(col)
X_norm[:, idx] = norm
y_norm, denormalized_y_p = Normalization.z_score_normalization(y_p)
def my_greate_test(max_order, l2_reg=0):
orders = [i for i in range(1, max_order)]
ols = OrdinaryLeastSquares(l2_regulazation=l2_reg)
rmse_erros_train = []
rmse_erros_test = []
X_trn, X_tst, y_trn, y_tst = train_test_split(X_norm, y_norm, test_size=0.2, random_state=42)
for order in orders:
X_train_pow = DataProcess.generate_polynomial_order(X_trn, order, with_bias=True)
X_tst_pow = DataProcess.generate_polynomial_order(X_tst, order, with_bias=True)
model = ols.fit(X_train_pow, y_trn)
preds_train = model.predict(X_train_pow)
preds_test = model.predict(X_tst_pow)
rmse_erros_train.append(ErrorMetrics.rmse(
denormalized_y_p(y_trn),
denormalized_y_p(preds_train)
))
rmse_erros_test.append(ErrorMetrics.rmse(
denormalized_y_p(y_tst),
denormalized_y_p(preds_test)
))
plots = [
go.Scatter(x=orders, y=rmse_erros_train, name='Train RMSE'),
go.Scatter(x=orders, y=rmse_erros_test, name='Test RMSE')
]
fig = go.Figure(data=plots)
return fig
fig = my_greate_test(11)
fig
fig = my_greate_test(11, l2_reg=0.001)
fig